library(Seurat)
library(tidyverse)
library(clusterProfiler)
library(cowplot)
library(ggplot2)
library(clusterProfiler)
library(patchwork)
library(dplyr)
library(sccomp)
library(forcats)
library(tidyr)
library(lme4)
library(emmeans)
library(ggpubr)
library(decoupleR)
library(OmnipathR)
library(grid)
merged_obj <- readRDS("/Users/xu/Desktop/1_project/major celltype/merged_obj.rds")

all.genes <- rownames(merged_obj)
fibroblast <- subset(merged_obj,subset = merged_annotation=="fibroblast")
fibroblast <- RunPCA(fibroblast,features = all.genes)
Seurat::ElbowPlot(fibroblast,ndims=50,reduction = "pca")

fibroblast <- FindNeighbors(fibroblast, dims = 1:20, reduction = "pca")
fibroblast <- FindClusters(fibroblast, resolution = 1,algorithm = 4)
fibroblast <- RunUMAP(fibroblast, dims = 1:20, reduction = "pca")

DimPlot(fibroblast, reduction="umap",group.by = c("seurat_clusters"),alpha = 0.2,label = TRUE)

cluster5<- subset(fibroblast,subset = seurat_clusters==5)
cluster5 <- FindClusters(cluster5,resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2036
## Number of edges: 59344
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7049
## Number of communities: 3
## Elapsed time: 0 seconds
DimPlot(cluster5)

cluster5$subpopulation <- "unknown"
cluster5$subpopulation[cluster5$seurat_clusters%in%c(1)] <- "CCL19+APOE+"
cluster5$subpopulation[cluster5$seurat_clusters==2] <- "ACTA2+TAGLN+"
cluster5$subpopulation[cluster5$seurat_clusters==0] <- "IGFBP4+"

fibroblast$subpopulation <- "unknown"
fibroblast$subpopulation[fibroblast$seurat_clusters%in%c(1,2)] <- "Universal"  
fibroblast$subpopulation[fibroblast$seurat_clusters%in%c(6)] <- "CCL19+APOE+"  
fibroblast$subpopulation[fibroblast$seurat_clusters%in%c(11)] <- "CCL8+"
fibroblast$subpopulation[fibroblast$seurat_clusters%in%c(3,12)] <- "IGFBP4+"
fibroblast$subpopulation[fibroblast$seurat_clusters%in%c(4)] <- "Superficial"
fibroblast$subpopulation[fibroblast$seurat_clusters%in%c(7)] <- "S100A4+"
fibroblast$subpopulation[fibroblast$seurat_clusters%in%c(8)] <- "COL8A1+"
fibroblast$subpopulation[fibroblast$seurat_clusters%in%c(9)] <- "ACTA2+TAGLN+"
fibroblast$subpopulation[fibroblast$seurat_clusters%in%c(10)] <- "TNN+COCH+"
fibroblast$subpopulation <- cluster5$subpopulation
table(fibroblast$subpopulation)
## 
## ACTA2+TAGLN+  CCL19+APOE+        CCL8+      COL8A1+      IGFBP4+      S100A4+ 
##         1619         2425          509         1487         3753         1592 
##  Superficial    TNN+COCH+    Universal 
##         2184          820         5586
DimPlot(fibroblast,group.by = "subpopulation")

gen_sorted <- c(
  "CD34","PI16",
  "COL18A1","COL23A1","APCDD1","NKD2","WIF1",
  "CCL8","CXCL12","ACKR3","SFRP4","COMP","COL8A1","FAP"
  ,"CCL19","APOE"
  , "SPARC","IGFBP4"
  ,"ACTA2","TAGLN"
  ,"S100A4"
 ,"TNMD","TNN","COCH")
color <-c('#E5D2DD', '#53A85F')
p <- DotPlot(fibroblast, features = gen_sorted,cols = color, group.by = "subpopulation"#, split.by = "group"
)+coord_flip()
exp <- p$data
exp$features.plot <- as.factor(exp$features.plot)
exp$features.plot <- fct_inorder(exp$features.plot)
desired_order <- c("Universal","Superficial","CCL8+","COL8A1+","CCL19+APOE+","IGFBP4+","ACTA2+TAGLN+", "S100A4+","TNN+COCH+")
exp$id <- factor(exp$id, levels = desired_order)
colnames(exp)[5] <- 'AverageExpression'
colnames(exp)[2] <- 'Percent'

cell_types <- levels(exp$id)  
xintercepts <- seq(1.5, length(cell_types) - 0.5, by=1)  

dotplot <- ggplot(exp, aes(x=id, y=features.plot))+
  geom_point(aes(size= Percent, color=AverageExpression))+  
  geom_point(aes(size=Percent, color=AverageExpression), shape=21, color="black", stroke=1)+  
  scale_size_continuous(range = c(2,6))+  
  theme(panel.grid = element_blank(),  
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        panel.background = element_rect(color = 'black', size = 1.2, fill = 'transparent'), 
        axis.text.x = element_text(size=11, color="black", angle=45, hjust = 1),  
        axis.text.y = element_text(size=11, color="black"))+  
  scale_color_gradientn(colors = colorRampPalette(c("#3492b2","#4fc3b0","#f8bb6e","#c22f2f"))(100))+  
  labs(x=NULL, y=NULL)+ 
  geom_vline(xintercept = xintercepts, linetype="dotted", size=0.8)
dotplot

matrisome <- readr::read_tsv('/Users/xu/Desktop/1_project/fibroblast/ecm/Hs_Matrisome_Masterlist_Naba et al_2012.xlsx - Hs_Matrisome_Masterlist.tsv')
ecm_gene <- matrisome %>% filter(`Matrisome Division` == "Core matrisome")

`%notin%` <- Negate(`%in%`)
all(ecm_gene$`Gene Symbol` %in% rownames(fibroblast))
## [1] FALSE
ecm_gene$`Gene Symbol`[which(ecm_gene$`Gene Symbol` %notin% rownames(fibroblast))]
##   [1] "AGRN"     "AMBN"     "AMELX"    "AMELY"    "BGLAP"    "BSPH1"   
##   [7] "CDCP2"    "CILP"     "CRELD1"   "CRELD2"   "CRIM1"    "CRISPLD1"
##  [13] "CRISPLD2" "CTGF"     "CYR61"    "DDX26B"   "DMBT1"    "DMP1"    
##  [19] "DSPP"     "ECM1"     "ECM2"     "EFEMP2"   "EGFLAM"   "EMID1"   
##  [25] "EMILIN1"  "EMILIN2"  "EMILIN3"  "EYS"      "FBLN1"    "FBLN7"   
##  [31] "FBN2"     "FBN3"     "FGA"      "FGB"      "FGG"      "FGL1"    
##  [37] "FGL2"     "FNDC7"    "FNDC8"    "HMCN2"    "IBSP"     "IGFALS"  
##  [43] "IGFBP3"   "IGFBP5"   "IGFBP6"   "IGFBP7"   "IGFBPL1"  "IGSF10"  
##  [49] "KAL1"     "KCP"      "LAMA1"    "LAMA2"    "LAMA4"    "LAMA5"   
##  [55] "LAMB1"    "LAMB2"    "LAMB4"    "LAMC3"    "LGI1"     "LGI2"    
##  [61] "LGI3"     "LGI4"     "LTBP3"    "LTBP4"    "MATN1"    "MATN2"   
##  [67] "MATN3"    "MATN4"    "MEPE"     "MFAP1"    "MFAP2"    "MFAP3"   
##  [73] "MGP"      "MMRN1"    "MXRA5"    "NELL2"    "NID1"     "NID2"    
##  [79] "NOV"      "NTN3"     "NTN5"     "NTNG1"    "OTOG"     "OTOL1"   
##  [85] "PAPLN"    "PCOLCE"   "PCOLCE2"  "POMZP3"   "PXDN"     "PXDNL"   
##  [91] "RSPO1"    "RSPO4"    "SBSPON"   "SLIT1"    "SNED1"    "SPARCL1" 
##  [97] "SPP1"     "SRPX2"    "SSPO"     "TECTA"    "TECTB"    "TGFBI"   
## [103] "THBS3"    "THBS4"    "THSD4"    "TNFAIP6"  "TSKU"     "TSPEAR"  
## [109] "VIT"      "VTN"      "VWA1"     "VWA2"     "VWA3A"    "VWA3B"   
## [115] "VWA5A"    "VWA5B1"   "VWA5B2"   "VWA7"     "VWA9"     "VWCE"    
## [121] "VWDE"     "WISP1"    "WISP2"    "WISP3"    "ZP1"      "ZP2"     
## [127] "ZP3"      "ZP4"      "ZPLD1"    "COL11A2"  "COL13A1"  "COL16A1" 
## [133] "COL19A1"  "COL20A1"  "COL21A1"  "COL22A1"  "COL24A1"  "COL26A1" 
## [139] "COL27A1"  "COL28A1"  "COL3A1"   "COL4A6"   "COL5A3"   "COL6A1"  
## [145] "COL6A2"   "COL6A3"   "COL6A5"   "COL6A6"   "COL8A2"   "COL9A1"  
## [151] "COL9A2"   "COL9A3"   "ACAN"     "CHADL"    "DCN"      "EPYC"    
## [157] "HAPLN1"   "HAPLN2"   "HAPLN3"   "HAPLN4"   "IMPG2"    "KERA"    
## [163] "NCAN"     "OGN"      "OMD"      "OPTC"     "PODN"     "PODNL1"  
## [169] "PRELP"    "PRG3"     "SPOCK2"   "SPOCK3"   "SRGN"     "VCAN"
ecm_gene[ecm_gene == 'DDX26B'] <- 'INTS6L'
ecm_gene[ecm_gene == 'KAL1'] <- 'ANOS1'
ecm_gene[ecm_gene == 'VWA9'] <- 'INTS14'

ecm_list <- split(ecm_gene$`Gene Symbol`, ecm_gene$`Matrisome Category`)
names(ecm_list)[names(ecm_list) == "ECM Glycoproteins"] <- "Glycoproteins"
ecm_list[["ECM"]] <- pull(ecm_gene, `Gene Symbol`)
fibroblast <- AddModuleScore(fibroblast, ecm_list, name = 'ECM_score', ctrl = 10)
for (j in seq_along(ecm_list)) {
  colnames(fibroblast@meta.data)[which(colnames(fibroblast@meta.data) == paste0('ECM_score', j))] <- paste0(names(ecm_list)[j], '_score')
}

##
library(ggplot2)
library(ggpubr)
library(gghalves)
library(ggrepel)
ordercolors<- c("#84c3b7","#b8aeeb","#f57c6e","#f8bb6e","#88d8db","#f2a7da","#add187","#8481ba","#e74434","#f8c7b4","#71b7ed", "#cedfef","#1864aa","#ee7e18","#4a94c6","#fae69e")


selected_columns <- c("fourgroups", "subpopulation" ,"ECM_score","Collagens_score","Glycoproteins_score","Proteoglycans_score") 
metadata_df <- fibroblast@meta.data[, selected_columns] %>% as.data.frame()
metadata_df$fourgroups <- factor(metadata_df$fourgroups,
                    levels = c("SSc_preCART" , "SSc_postCART_early","SSc_postCART_late", "SSc_postCART_superlate"))
colnames(metadata_df) <- gsub("-", "_", colnames(metadata_df))  

score_names <- c("ECM_score", "Collagens_score", "Glycoproteins_score", "Proteoglycans_score")

plots <- lapply(score_names, function(score) {
  set.seed(123) 
  ggplot(data = metadata_df, aes(x = subpopulation, y = .data[[score]], fill = subpopulation)) +
    geom_half_violin(side = "r", color = NA, alpha = 0.4) +
    geom_half_boxplot(side = "r", errorbar.draw = FALSE, width = 0.2, linewidth = 0.8) +
    geom_half_point_panel(side = "l", shape = 21, size = 3, color = "white") +
    scale_fill_manual(values = ordercolors) +
    labs(y = score, x = NULL, title = score) +
    rotate_x_text(angle = 45) +
    geom_hline(yintercept = mean(metadata_df[[score]], na.rm = TRUE), linetype = 2) +
    theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
          legend.position = "none",
          legend.title = element_blank(),
           legend.text = element_blank(),
          panel.background = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.text = element_text(size = 14, color = "black",family = "Arial"),
           axis.text.x= element_text(color = "black", size = 14, face = "bold",family = "Arial"),
         
          axis.title.y = element_text(color = "black", size = 14, face = "bold",family = "Arial"),
          axis.line = element_line(size = 0.8, colour = "black"))
})
plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

p <- plots[[2]]
p

ggsave("collagencore.pdf", plot =p, width = 10, height =8, units = "in", device = cairo_pdf)
ggsave(
  filename = "collagencore.png",
  plot =  p,
  width = 10,          
  height = 8,         
  dpi = 600           
)
library(scCustomize)
library(dplyr)
library(cowplot)

color_pal <- colorRampPalette(c("#dedeea", "#bcb9d8","#8488b5","#61678b"))(50)

p1 <- FeaturePlot_scCustom(
  seurat_object = fibroblast,
  features = "Collagens_score",
  order = TRUE,
  colors_use = color_pal,
  alpha_exp = 0.4
  #na_cutoff = FALSE
) +
  NoAxes()

arrow <- arrow(angle = 20, type = "closed", length = unit(0.1, "npc"))
umap_coord <- ggplot(tibble(group = c("UMAP_1", "UMAP_2"),
                            x = c(0, 0), xend = c(1, 0),
                            y = c(0, 0), yend = c(0, 1),
                            lx = c(0.5, -0.15), ly = c(-0.15, 0.5),
                            angle = c(0, 90))) +
  geom_segment(aes(x, y, xend = xend, yend = yend, group = group),
               arrow = arrow, size = 1, lineend = "round") +
  geom_text(aes(lx, ly, label = group, angle = angle), size = 4) +
  theme_void() +
  coord_fixed(xlim = c(-0.3, 1), ylim = c(-0.3, 1))
p_full <- ggdraw() +
  draw_plot(p1 + NoAxes(), scale = 0.9) +
  draw_plot(umap_coord, x = 0.05, y = 0.05, width = 0.2, height = 0.2)
p_full 

ggsave(p_full,file="collagens_feature.pdf",width=6,height=6)
ggsave(
  filename = "collagens_feature.png",
  plot =  p_full,
  width = 6,          
  height = 6,         
  dpi = 600           
)
ordercolors<- c("#84c3b7","#b8aeeb","#f57c6e","#f8bb6e","#88d8db","#f2a7da","#add187","#8481ba","#e74434","#f8c7b4","#71b7ed", "#cedfef","#1864aa","#ee7e18","#4a94c6","#fae69e")
net <- decoupleR::get_progeny(top = 2000)
mat2 <- as.matrix(fibroblast@assays$Xenium$data)
net2 <- net %>% filter(target %in% rownames(mat2))
sc_act <- decoupleR:: run_mlm(as.matrix(fibroblast@assays$Xenium$data), net2)
sc_act <- sc_act %>% pivot_wider(id_cols = condition, names_from = source, values_from = score) %>% column_to_rownames("condition")

selected_columns <- c("fourgroups", "subpopulation" ,"Androgen","EGFR","Estrogen","Hypoxia","JAK-STAT","MAPK","NFkB","PI3K","TGFb" ) 
metadata_df <- fibroblast@meta.data
metadata_df_with_score <- cbind(metadata_df, sc_act)
metadata_df_with_score <- metadata_df_with_score[, selected_columns] %>% as.data.frame()
metadata_df_with_score$fourgroups <- factor(metadata_df_with_score$fourgroups,
                    levels = c("SSc_preCART" , "SSc_postCART_early","SSc_postCART_late", "SSc_postCART_superlate"))
colnames(metadata_df_with_score) <- gsub("-", "_", colnames(metadata_df_with_score))  

score_names <- c("Androgen"      ,"EGFR","Estrogen","Hypoxia","JAK_STAT","MAPK","NFkB","PI3K","TGFb" )

plots <- lapply(score_names, function(score) {
  ggplot(data = metadata_df_with_score, aes(x = subpopulation, y = .data[[score]], fill = subpopulation)) +
    geom_half_violin(side = "r", color = NA, alpha = 0.4) +
    geom_half_boxplot(side = "r", errorbar.draw = FALSE, width = 0.2, linewidth = 0.8) +
    geom_half_point_panel(side = "l", shape = 21, size = 3, color = "white") +
    scale_fill_manual(values = ordercolors) +
    labs(y = paste0(score, "_", "activity"), x = NULL, title = score) +
    rotate_x_text(angle = 45) +
    geom_hline(yintercept = mean(metadata_df_with_score[[score]], na.rm = TRUE), linetype = 2) +
    theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
          legend.position = "none",
          legend.title = element_blank(),
           legend.text = element_blank(),
          panel.background = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.text = element_text(size = 14, color = "black",family = "Arial"),
           axis.text.x= element_text(color = "black", size = 14, face = "bold",family = "Arial"),
         
          axis.title.y = element_text(color = "black", size = 14, face = "bold",family = "Arial"),
          axis.line = element_line(size = 0.8, colour = "black"))
})
plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

p <- plots[[9]]
p <-p+labs(y = "TGFβ_activity")
getwd()
## [1] "/Users/xu/Desktop/script"
ggsave("tgf_vio.pdf", plot =p, width = 10, height =8, units = "in", device = cairo_pdf)
ggsave(
  filename = "tgf_vio.png",
  plot =  p,
  width = 10,          
  height = 8,         
  dpi = 600           
)

metadata_df <- fibroblast@meta.data
metadata_df_with_score <- cbind(metadata_df, sc_act)
fibroblast@meta.data <- metadata_df_with_score
color_pal <- colorRampPalette(c("#dedeea", "#bcb9d8","#8488b5","#61678b"))(50)

p1 <- FeaturePlot_scCustom(
  seurat_object = fibroblast,
  features = "TGFb",
  order = TRUE,
  colors_use = color_pal,
  alpha_exp = 0.4
) +
  NoAxes()
arrow <- arrow(angle = 20, type = "closed", length = unit(0.1, "npc"))
umap_coord <- ggplot(tibble(group = c("UMAP_1", "UMAP_2"),
                            x = c(0, 0), xend = c(1, 0),
                            y = c(0, 0), yend = c(0, 1),
                            lx = c(0.5, -0.15), ly = c(-0.15, 0.5),
                            angle = c(0, 90))) +
  geom_segment(aes(x, y, xend = xend, yend = yend, group = group),
               arrow = arrow, size = 1, lineend = "round") +
  geom_text(aes(lx, ly, label = group, angle = angle), size = 4) +
  theme_void() +
  coord_fixed(xlim = c(-0.3, 1), ylim = c(-0.3, 1))
p_full <- ggdraw() +
  draw_plot(p1 + NoAxes(), scale = 0.9) +
  draw_plot(umap_coord, x = 0.05, y = 0.05, width = 0.2, height = 0.2)
p_full 

ggsave(p_full ,file="tgf_feature.pdf",width=6,height=6)
ggsave(
  filename = "tgf_feature.png",
  plot = p_full,
  width = 6,          
  height = 6,         
  dpi = 600           
)